*Code to prepare stata dataset for WDL paper to import into R
*Last updated: 3-26-2021 by Matt Johnson
global project "C:/Users/msj22/Dropbox/research/wrongful_discharge/replication_May_2022/"
global replication_rawdata "${project}data/"
global replication_cleandata "${project}intermediate_dtas/"


	u ${replication_cleandata}osha_nsc2.dta, clear 
	

*clean up the main dataset we are using, to import a smaller dataset into R
	keep if year<=2005
	keep if sector==1 
	
	keep state2 state_code year division year_div state_run_office ln_acc_rateY_matt acc_rateY_matt wdlapY wdlacY wdlagY wdlap wdlac wdlag  wb wc ln_death_rate_matt dem_gov sh_emp1979 sector sh_emp1970 
	
*This is the main regressions we run. From the below code, hopefully it is clear what all the pieces are for the simple diff-in-diff-in-diff
	*Note: our main treatment of interest is wdlapY: a measure if a state had adopted the "public policy exceeption" to at-will employment
	*In the year of adoption, this variable is equal to the fraction of the year in which the treatment was in place. 
	*If for Callaway and Sant'Anna, you need a purely 0/1 treatment, you can use wdlap 
	
	
	global weight = "[aw = sh_emp1979]"

	*Our main regressions use data from OSHA, and require a particular sample condition. Make sure to include this sample condition in the estimation in R
		global sample = "((state_run_office!=1 & year>=1979) | (state_run_office==1 & year>=1992)) & sector == 1" 

		global fes = "year state_code "
		*Basic: state + year FE 
			reghdfe ln_acc_rateY_matt  wdlapY $weight if sector == 1 & $sample, absorb($fes) vce(cluster state_code) 
*			outreg2 using result, tex replace addtext(State fixed effect, Yes, State trend, No, Division-year fixed effect, No)
		* state + division-year FE
			reghdfe ln_acc_rateY_matt  wdlapY $weight if sector == 1 & $sample, absorb($fes year_div ) vce(cluster state_code) 
*				outreg2 using result, tex append addtext(State fixed effect, Yes, State trend, No, Division-year fixed effect, Yes)
		*state + division-year FE + state trends 
			reghdfe ln_acc_rateY_matt  wdlapY $weight if sector == 1 & $sample, absorb($fes year_div c.year#i.state_code) vce(cluster state_code) 
*				outreg2 using result, tex append addtext(State fixed effect, Yes, State trend, Yes, Division-year fixed effect, Yes)

	
			
 * add an extra step to get the first treatment year			
		* Generate a new varaible: first treatment year

		foreach v of varlist wdlap wb wc {
			egen zfirst_year = min(year) if `v' > 0 & missing(`v' )!=1, by(state_code)
			egen first_year_`v'  = min(zfirst_year), by(state_code)
			replace first_year_`v' = 0 if missing(first_year_`v')==1
			drop zfirst_year 

			egen zfirst_year = min(year) if `v' ==1, by(state_code)
			egen first_year_full_`v'  = min(zfirst_year), by(state_code)
			replace first_year_full_`v' = 0 if missing(first_year_full_`v')==1
			drop zfirst_year 
		}
		
/*		
		* net from https://www.sealedenvelope.com/
			by state_code (year), sort: gen byte first = sum(wdlap) == 1  & sum(wdlap[_n - 1]) == 0  
			gen firstyear= year if first==1
			
			xtset state_code
			xfill firstyear, i(state_code)
			replace firstyear=0 if firstyear==.


		
		*First year of the WB and WC statutes 
			gen zz = year if wb==1 
			egen firstyear_wb = min(zz), by(state_code)
			replace firstyear_wb = 0 if firstyear_wb ==.
			drop zz 
			
			gen zz = year if wc==1 
			egen firstyear_wc = min(zz), by(state_code)
			replace firstyear_wc = 0 if firstyear_wc ==.
			drop zz 
*/		
		save ${replication_cleandata}osha_nsc2_for_cs_estimator.dta, replace
